options(warn=-1)
library(data.table)
suppressMessages(library(limma))
suppressMessages(library(ENmix))
suppressMessages(library(sva))
suppressMessages(library(ChAMP))
options(warn=0)
# Let's look at the effect of sex
plotMDS(beta, top=10000, gene.selection="common",
pch=17,col=c("deeppink","blue")[factor(pheno$sex)],
dim=c(1,2),cex=1.5)
legend("center", legend=levels(factor(pheno$sex)),bty='n',
cex=1.5,pch=17,col=c("deeppink","blue"))
# Let's look at the effect of sex but let's remove the X and Y chromosomes
betas.clean = beta[manifest[probe_type=="cg" & !chr %in% c("X","Y")]$index,]
plotMDS(betas.clean, top=10000, gene.selection="common",
pch=17,col=c("deeppink","blue")[factor(pheno$sex)],
dim=c(1,2),cex=1.5)
legend("center", legend=levels(factor(pheno$sex)),bty='n',
cex=1.5,pch=17,col=c("deeppink","blue"))
# Let's look at the effect of technical variables
# Row
cols <- rainbow(n = length(levels(factor(pheno$row))))
plotMDS(betas.clean, top=10000, gene.selection="common",
pch=17,col=cols[factor(pheno$row)],
dim=c(1,2),cex=1.5)
legend("right", legend=levels(factor(pheno$row)),bty='n',
cex=1.5,pch=17,col=cols)
# If clustering by technical variables is present, batch effects can be adjusted for using ComBat or using covariates for batch in downstream analyses.
# Example of ComBat
# Impute missing Beta-values (ComBat will produce an error with missingness)
# sum(is.na(betas.clean))
# betas.impute = champ.impute(beta=betas.clean, pd=pheno, k=5, ProbeCutoff=0.2, SampleCutoff=0.1)
# betas.impute = betas.impute$beta
# Run ComBat
# batch <- factor(pheno$Sentrix_ID)
# modcombat <- model.matrix(~1, data = pheno)
# betaCombat <- ComBat(dat = as.matrix(betas.impute), batch = batch, mod = modcombat)
It would be useful to look at several traits with global variability
cov<-data.frame(pheno[,c('smoker','sex','CD4','CD8','NK','MO','GR','B')])
npc <- 20 # Top 20 PCs
svd <- prcomp(t(na.omit(betas.clean)))
screeplot(svd, npc, type = "barplot")
eigenvalue <- svd[["sdev"]]^2
prop <- (sum(eigenvalue[1:npc])/sum(eigenvalue)) * 100
cat("Top ", npc, " principal components can explain ",
prop, "% of data \n variation", "\n")
## Top 20 principal components can explain 81.99883 % of data
## variation
pcrplot(na.omit(betas.clean), cov, npc=10) # Already saved in working directory
## Analysis is running, please wait...!
## svdscreeplot.jpg was plotted
## Top 10 principal components can explain 68.58916 % of data
## variation
## pcr_diag.jpg was plotted
Load package for Age-Prediction
options(warn=-1)
suppressMessages(library(wateRmelon))
options(warn=0)
DNAmAge<-agep(beta)$horvath.age
hist(DNAmAge)
boxplot(DNAmAge);stripchart(DNAmAge, vertical = T,method = "jitter", add = T, pch = 20, col = 'red')
Agreement between Horvath’s Epigenetic age and Hannum’s clock
data(hannumCoef)
length(hannumCoef)
## [1] 71
DNAmAge.Hannum<-agep(beta,coeff=hannumCoef,method = "hannum")$custom_age
Correlation; agreement
cor.test(DNAmAge,DNAmAge.Hannum)
##
## Pearson's product-moment correlation
##
## data: DNAmAge and DNAmAge.Hannum
## t = 13.433, df = 28, p-value = 9.949e-14
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## 0.8576226 0.9666606
## sample estimates:
## cor
## 0.9304164
plot(DNAmAge.Hannum,DNAmAge,pch=21,ylab="Hortvath's DNAm Age",
xlab="Hannum's DNAm Age",cex=1.2, bg=alpha("deepskyblue",0.45),main="Epigenetic Clocks")
legend("topleft",legend=paste0("r=",round(cor(DNAmAge.Hannum,DNAmAge),2)),bty="n")
abline(lm(DNAmAge~DNAmAge.Hannum),col="red",lw=2)
Age Acceleration Residuals
AgeAccelerationResidual<-residuals(lm(DNAmAge.Hannum~DNAmAge))
boxplot(AgeAccelerationResidual ~pheno$smoker, col=c("blue","red"))
wilcox.test(AgeAccelerationResidual ~ pheno$smoker)
##
## Wilcoxon rank sum exact test
##
## data: AgeAccelerationResidual by pheno$smoker
## W = 91, p-value = 0.3892
## alternative hypothesis: true location shift is not equal to 0
Differences by Smoking status
boxplot(DNAmAge ~pheno$smoker, col=c("blue","red"))
wilcox.test(DNAmAge ~ pheno$smoker)
##
## Wilcoxon rank sum exact test
##
## data: DNAmAge by pheno$smoker
## W = 102, p-value = 0.6827
## alternative hypothesis: true location shift is not equal to 0
boxplot(DNAmAge.Hannum ~pheno$smoker, col=c("blue","red"))
wilcox.test(DNAmAge.Hannum ~ pheno$smoker)
##
## Wilcoxon rank sum exact test
##
## data: DNAmAge.Hannum by pheno$smoker
## W = 93, p-value = 0.4363
## alternative hypothesis: true location shift is not equal to 0
Load Age Acceleration measures See Horvath New Methylation Age Calculator.
# Online<-read.csv("Clock/datout_New.output.csv")
Online<- Online[match(pheno$gsm, Online$SampleID),]
group <- NA
group[pheno$smoker=="smoker"] <- 1
group[pheno$smoker=="non-smoker"] <- 2
pairs(~DNAmAge + DNAmPhenoAge + DNAmAgeSkinBloodClock +
DNAmGrimAge + DNAmTL + DNAmAgeHannum,
col = c("red","blue")[group],
pch = c(8, 18)[group],
data = Online)
Look at GrimAge Acceleration
boxplot(Online$DNAmGrimAgeAdjAge ~pheno$smoker, col=c("blue","red"))
wilcox.test(Online$DNAmGrimAgeAdjAge ~ pheno$smoker)
##
## Wilcoxon rank sum exact test
##
## data: Online$DNAmGrimAgeAdjAge by pheno$smoker
## W = 2, p-value = 5.157e-08
## alternative hypothesis: true location shift is not equal to 0
Pack years predicted from DNA methylation
boxplot(Online$DNAmPACKYRS ~pheno$smoker, col=c("blue","red"))
wilcox.test(Online$DNAmPACKYRS ~ pheno$smoker)
##
## Wilcoxon rank sum exact test
##
## data: Online$DNAmPACKYRS by pheno$smoker
## W = 3, p-value = 9.025e-08
## alternative hypothesis: true location shift is not equal to 0
DNA methylation estimate of TL
boxplot(Online$DNAmTLAdjAge ~pheno$smoker, col=c("blue","red"))
t.test(Online$DNAmTLAdjAge ~ pheno$smoker)
##
## Welch Two Sample t-test
##
## data: Online$DNAmTLAdjAge by pheno$smoker
## t = 3.5366, df = 27.251, p-value = 0.001473
## alternative hypothesis: true difference in means between group non-smoker and group smoker is not equal to 0
## 95 percent confidence interval:
## 0.08360011 0.31441308
## sample estimates:
## mean in group non-smoker mean in group smoker
## 0.0995033 -0.0995033
End of script 03